#Packages required (all can be acquired using the code install.packages("package name") and then library(package name):
#1. readxl
#2. lfe
#3. stargazer

library(readxl)
library(lfe)
library(stargazer)

by_author <- read_excel("./By_Author_Anon.xlsx")
by_paper <- read_excel("./By_Paper_Anon.xlsx")

###Below is the code for the difference in difference analyses looking at papers.
paper_ids = unique(by_paper$id)
dates = seq(as.Date("2/1/2015", format = "%d/%m/%Y"), as.Date("11/8/2022", format = "%d/%m/%Y"),
            by = "days")

panel = merge(paper_ids, dates)

colnames(panel) = c("id","date")

panel = merge(panel, by_paper[, c("id", "solo_auth", "all_women_team", "initial_date_submitted", "covid_paper", "women_solo_auth")],
              by = "id", all.x=TRUE)

panel = panel[order(panel[,1], panel[,2]), ]

panel$covid = ifelse(panel$date >= as.Date("2020-03-14"), 1, 0)

panel$initial_date_submitted = as.Date(panel$initial_date_submitted, format = "%m/%d/%Y")

panel$submitted = ifelse(panel$date >= panel$initial_date_submitted, 1, 0)

library(lfe)
library(stargazer)
#Model for Table A2: The Likelihood of a Paper Being Written by a Woman Alone Pre and Post Covid
mod2 = felm(submitted ~ women_solo_auth*covid | 0 | 0 | id,
            data = panel, subset = panel$covid_paper == 0)
summary(mod2)
stargazer(mod2, type = "text", dep.var.labels = c("Whether a Paper is Submitted"), title = "The Likelihood of a Paper Being Written by a Woman Alone Pre- and Post- Covid", digits = 1, out = "Desktop/PAGmod2.html", covariate.labels = c("Paper with a Solo Woman Author", "Covid-19", "Paper with a Solo Woman Author X Covid-19"))

#Model for Table A3: The Likelihood of a Paper Being Written by a Solo Author Pre and Post Covid
mod3 = felm(submitted ~ solo_auth*covid | 0 | 0| id, data = panel, subset = panel$covid_paper == 0)
summary(mod3)
stargazer(mod3, type = "text", dep.var.labels = c("Whether a Paper is Submitted"), title = "The Likelihood of a Paper Being Written by a Solo Author Pre and Post Covid", digits = 1, out = "./PAGmod3.html", covariate.labels = c("Solo Authored Paper", "Covid-19", "Solo Authored Paper X Covid-19"))

#Model for Table A4: The Likelihood of an All-Women Team Paper (Given a Team-Authored Paper) Pre and Post Covid
mod4 <- felm(submitted ~ all_women_team*covid | 0 | 0| id, data = panel, subset = (panel$solo_auth == 0 & panel$covid_paper == 0))
summary(mod4)
stargazer(mod4, type = "text", dep.var.labels = c("Whether a Paper is Submitted"), title = "The Likelihood of an All-Woman Team Paper (Given a Team Authored Paper) Pre- and Post- Covid", digits = 1, out = "./PAGmod4.html", covariate.labels = c("All-Women Team Paper", "Covid-19", "All-Women Team Paper X Covid-19"))

###Below is the code for the difference in difference analysis looking at authors.

author_ids = unique(by_author$id_number)
dates2 = seq(as.Date("2/1/2015", format = "%d/%m/%Y"), as.Date("11/8/2022", format = "%d/%m/%Y"),
             by = "days")

panel2 = merge(author_ids, dates2)

colnames(panel2) = c("id_number","date")

panel2 = merge(panel2, by_author[, c("id_number", "gender", "initial_date_submitted", "covid_paper")],
               by = "id_number", all.x=TRUE)

panel2 = panel2[order(panel2[,1], panel2[,2]), ]

panel2$covid = ifelse(panel2$date >= as.Date("2020-03-14"), 1, 0)

panel2$initial_date_submitted = as.Date(panel2$initial_date_submitted, format = "%m/%d/%Y")

panel2$submitted = ifelse(panel2$date >= panel2$initial_date_submitted, 1, 0)

#Model for Table A1: The Likelihood of an Author Being a Woman Pre- and Post- Covid
library(lfe)
mod5 <- felm(submitted ~ gender*covid | 0 | 0| id_number, data = panel2, subset = panel2$covid_paper == 0)
summary(mod5)
stargazer(mod5, type = "text", dep.var.labels = c("Whether a Paper is Submitted"), title = "The Likelihood of an Author Being a Woman Pre and Post Covid", digits = 1, out = "./PAGmod5.html", covariate.labels = c("Author Gender", "Covid-19", "Author Gender X Covid"))
